#!/bin/bash

debug=0
me="${0##*/}"
# a little script to get useragent of currently installed FF
#~ useragent="$(getua)" || 
useragent="Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 Firefox/91.0"
file=""
# The divs we're searching for:
query=tablecomponent
URL="https://en.ilmatieteenlaitos.fi/weather-symbols" # english is default
deps=( wget xmllint grep )

usage() {
[[ "$*" != "" ]] && echo "$*"
cat <<EOF

Download a web page that contains explanations for SmartSymbol weather symbols,
and parse it.
Dependencies: ${deps[@]}

Usage: $me [options] [> file]

Options:

-l str  Language: one of en, sv or fi (default: en)

-U str  Override user agent. Default:
        $useragent

-d      Debugging output

-f str  Parse local file. Invalidates other options. Mostly for debugging.

EOF
exit 1
}

__sep() {
   [[ "$*" != "" ]] && echo "############ $*" >&2
   echo "##################################################################" >&2
}

xpath() { 
   # for xmllint to recognize broken html snippets as utf8-encoded:
   utf="<meta charset=\"utf-8\" />"
	echo "$utf$2" | xmllint --html --nonet --xpath "$1" - 2>/dev/null
	# xmllint creates loads of error messages even when it's working correctly
	# if you really want to see these messages, remove '2>/dev/null'
}

while getopts "f:l:U:hd" opt; do
  case $opt in
    f) [ -r "$OPTARG" ] && file="$OPTARG" || usage "cannot read $OPTARG"
    ;;
    l)
      case "$OPTARG" in
        en)
        ;;
        sv) URL="https://sv.ilmatieteenlaitos.fi/vadersymbolerna"
        ;;
        fi) URL="https://www.ilmatieteenlaitos.fi/saamerkkien-selitykset"
        ;;
        *) usage "Wrong language $OPTARG"
        ;;
      esac
    ;;
    U) useragent="$OPTARG"
    ;;
    d) debug=1
    ;;
    *) usage
      ;;
  esac
done

[[ "$debug" == 0 ]] && exec 2>/dev/null
echo "User Agent: $useragent" >&2

# simple dependency check
for dep in "${deps[@]}"; do
	type -f $dep >/dev/null || usage
done

__sep "wgetting $URL or file"
[[ "$file" == "" ]] && html="$(wget -U "$useragent" -O - "$URL")" || html="$(<"$file")"

__sep "getting good stuff with xmllint"
html="$(xpath "//div[@id=\"main-content\"]" "$html")"

table=()
i=0
while read -r line; do
if [[ "$line" == "$query" ]]; then
    ((i++))
    table[i]="$(xpath "(//div[@class=\"$query\"])[$i]" "$html")"
    __sep "table $i"
    echo "${table[i]}" >&2
fi
done <<<"$(grep -o -w "$query" <<<"$html")"

__sep "Normal output (end result)"
echo "Let's just assume that the first table is the only one we're interested in." >&2
i=1
longest=0
#~ for ((i=1;i<=${#table[@]};i++)); do
    #~ echo "##################################"
    #~ echo "${table[i]}"
    IFS=$'\n' text=( $(xpath "//table/tbody/tr/td/img/@alt" "${table[i]}") )
    IFS=$'\n' icon=( $(xpath "//table/tbody/tr/td/img/@src" "${table[i]}") )

    #~ IFS=$'\n' text=( $(xpath "//table" "${table[i]}") )
    for ((j=0;j<${#text[@]};j++)); do
        s1="${icon[j]%\"*}"
        s1="${s1#*\"}"
        s1="${s1##*/}"
        s1="${s1%.*}"
        s2="${text[j]%\"*}"
        s2="${s2#*\"}"
        (( ${#s2} > longest )) && longest=${#s2}
        printf "%s:%s\n" "$s1" "$s2"
    done
#~ done
echo "longest:$longest"
